In this analysis, I looked into :
What gender rides the bikes the most
The average duration of each rides
Age range of people who takes the ride
The hour of the day people takes rides the most . and so on..
The dataset consists of 183412 rows and 16 columns of different datatypes.
# import all packages and set plots to be embedded inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
%matplotlib inline
# suppress warnings from final output
import warnings
warnings.simplefilter("ignore")
bike = pd.read_csv('201902-fordgobike-tripdata.csv')
#Date wrangling, fixing null values
bike['member_birth_year'].fillna(bike['member_birth_year'].mode()[0], inplace = True)
bike['member_gender'].fillna(bike['member_gender'].mode()[0], inplace = True)
bike.dropna(inplace = True)
#bike.isnull().sum()
# Changing the datatypes to the appropraite ones
bike['start_time'] = pd.to_datetime(bike['start_time'], format= '%Y-%m-%d %H:%M:%S')
bike['end_time'] = pd.to_datetime(bike['end_time'], format= '%Y-%m-%d %H:%M:%S')
bike['member_birth_year'] = bike['member_birth_year'].astype(int)
# Copy the data
bike_clean = bike.copy()
#Generating another columns
bike_clean['start_weekday_name'] = bike_clean['start_time'].dt.day_name()
bike_clean['end_weekday_name'] = bike_clean['end_time'].dt.day_name()
bike_clean['hh'] = bike_clean['start_time'].dt.strftime('%H')
bike_clean['member_age'] = (2019 - bike_clean['member_birth_year'])
bike_clean['member_age']
0 35
1 31
2 47
3 30
4 45
..
183407 23
183408 35
183409 29
183410 31
183411 30
Name: member_age, Length: 183215, dtype: int32
-- There are more of people who are subscribers to customers
-- We have more males to female
-- We have younger people who board bikes to older people
count = bike_clean['user_type'].value_counts()
label = ['Subscriber', 'Customer']
colors = ["#8c564b", "#ff7f0e"]
fig = plt.figure(figsize =(10, 7))
plt.pie(count, labels = label, autopct='%1.1f%%', colors=colors)
plt.title("User Type ")
# show plot
plt.show()
base_color = sns.color_palette()[0]
plt.title("Count per gender ")
gender = bike_clean['member_gender'].value_counts().index
sns.countplot(data=bike_clean, y='member_gender', color=base_color, order=gender);
fig = px.histogram(bike_clean, x = 'member_age', title = 'Age per rides')
fig.show()
-- We have higher trips on weekdays to weekends
-- Most ofthe trip is made around 5pm and 8am and we deduce this is due to rush hours.
weekdays = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
fig, ax = plt.subplots(nrows=2, figsize = [8,8])
#plt.title = 'Rides per weekdays'
default_color = sns.color_palette()[0]
sns.countplot(data = bike_clean, x = 'start_weekday_name', color = default_color, ax = ax[0], order = weekdays).set(title = 'Rides per weekdays')
sns.countplot(data = bike_clean, x = 'end_weekday_name', color = default_color, order = weekdays)
plt.show()
plt.figure(figsize = [15, 10]);
base_color = sns.color_palette()[0]
plt.title("Count per hour ")
hour = bike_clean['hh'].value_counts().index
sns.countplot(data=bike_clean, y='hh', color = base_color, order=hour);
!jupyter nbconvert Project_three_II.ipynb --to slides --post serve --no-input --no-prompt
[NbConvertApp] Converting notebook Project_three_II.ipynb to slides
[NbConvertApp] Writing 4799212 bytes to Project_three_II.slides.html
[NbConvertApp] Redirecting reveal.js requests to https://cdnjs.cloudflare.com/ajax/libs/reveal.js/3.5.0
Traceback (most recent call last):
File "C:\Users\SAOBAN\Documents\Saoban_Anaconda\Scripts\jupyter-nbconvert-script.py", line 10, in <module>
sys.exit(main())
File "C:\Users\SAOBAN\Documents\Saoban_Anaconda\lib\site-packages\jupyter_core\application.py", line 254, in launch_instance
return super(JupyterApp, cls).launch_instance(argv=argv, **kwargs)
File "C:\Users\SAOBAN\Documents\Saoban_Anaconda\lib\site-packages\traitlets\config\application.py", line 845, in launch_instance
app.start()
File "C:\Users\SAOBAN\Documents\Saoban_Anaconda\lib\site-packages\nbconvert\nbconvertapp.py", line 350, in start
self.convert_notebooks()
File "C:\Users\SAOBAN\Documents\Saoban_Anaconda\lib\site-packages\nbconvert\nbconvertapp.py", line 524, in convert_notebooks
self.convert_single_notebook(notebook_filename)
File "C:\Users\SAOBAN\Documents\Saoban_Anaconda\lib\site-packages\nbconvert\nbconvertapp.py", line 491, in convert_single_notebook
self.postprocess_single_notebook(write_results)
File "C:\Users\SAOBAN\Documents\Saoban_Anaconda\lib\site-packages\nbconvert\nbconvertapp.py", line 463, in postprocess_single_notebook
self.postprocessor(write_results)
File "C:\Users\SAOBAN\Documents\Saoban_Anaconda\lib\site-packages\nbconvert\postprocessors\base.py", line 28, in __call__
self.postprocess(input)
File "C:\Users\SAOBAN\Documents\Saoban_Anaconda\lib\site-packages\nbconvert\postprocessors\serve.py", line 90, in postprocess
http_server.listen(self.port, address=self.ip)
File "C:\Users\SAOBAN\Documents\Saoban_Anaconda\lib\site-packages\tornado\tcpserver.py", line 151, in listen
sockets = bind_sockets(port, address=address)
File "C:\Users\SAOBAN\Documents\Saoban_Anaconda\lib\site-packages\tornado\netutil.py", line 161, in bind_sockets
sock.bind(sockaddr)
OSError: [WinError 10048] Only one usage of each socket address (protocol/network address/port) is normally permitted